home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
IRIX Installation Tools & Overlays 2002 November
/
SGI IRIX Installation Tools & Overlays 2002 November - Disc 4.iso
/
dist
/
infosearch.idb
/
usr
/
lib
/
infosearch
/
bin
/
sgindexAdmin.z
/
sgindexAdmin
Wrap
Text File
|
2002-10-15
|
22KB
|
892 lines
#!/usr/bin/perl
#
# Copyright 1996-2002, Silicon Graphics, Inc.
# All Rights Reserved.
#
# This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.;
# the contents of this file may not be disclosed to third parties, copied or
# duplicated in any form, in whole or in part, without the prior written
# permission of Silicon Graphics, Inc.
#
# RESTRICTED RIGHTS LEGEND:
# Use, duplication or disclosure by the Government is subject to restrictions
# as set forth in subdivision (c)(1)(ii) of the Rights in Technical Data
# and Computer Software clause at DFARS 252.227-7013, and/or in similar or
# successor clauses in the FAR, DOD or NASA FAR Supplement. Unpublished -
# rights reserved under the Copyright Laws of the United States.
#
# requires perl5
#
# ------------
# sgindexAdmin
# ------------
#
# see man sgindexAdmin(1)
#
# Manages merging and synchronization of infosearch fulltext
# indices and creation of tables of contents.
#
# sgindexAdmin -toc <optional content domain name>
#
# Create HTML table of contents
# from the fulltext index title files.
#
# sgindexAdmin -update <optional content domain name>
#
# Normally run as an exitop; updates database(s)
# Also performs the "-toc" phase
#
# sgindexAdmin -instchk <optional content domain name>
#
# Same as update, except we just check
# the indices dates vs. the inst history
# files to see if new products have been
# installed. Invoked from /etc/init.d/configmsg
# If new products have been installed, run update.
# Also updates the man whatis database from our
# man title file.
#
# sgindexAdmin -clean <optional content domain name>
#
# Remove local index & table of
# contents (run as a removeop).
#
require 5;
#----------------------------
# initialize global variables
#
umask 022;
@domainList = ();
%tocProgramList = ();
%domainPathList = ();
%searchPath = ();
%altProg = ();
%indexProg = ();
%contentExpr = ();
$ENV{'SGINDEXADMIN'} = "TRUE";
$tmpdir = $ENV{'TMPDIR'} || "/tmp";
$root = "/"; # -r domain root | $root="/disk2"
$verbose = 0; # -v
$nfs = "-local"; # -nfs
$force = 0; # -force
$instchk = 0; # -instchk
$output = ">/dev/null"; # set to NULL if $verbose==1
$timex = ""; # set to "timex" if -t
$keepwrd = 0; # delete wrd file when done?
$clean_info = 0; # remove some op files
#--------------------------------------------
# add a -r -root to override default settings
#
@TMP = @ARGV;
while ($arg = shift(@TMP)) {
if ($arg eq "-r") {
$root = shift(@TMP);
}
}
&readConfig();
#-------------------------
# no args, print usage msg
#
if ($#ARGV == -1) {
&usage();
exit(1);
}
#-----------------------------------------------------------
# clean up tmp files if the program is interrupted or killed
#
$SIG{'INT'} = 'tmpFileCleanup';
$SIG{'QUIT'} = 'tmpFileCleanup';
$SIG{'KILL'} = 'tmpFileCleanup';
$SIG{'ABRT'} = 'tmpFileCleanup';
#----------------------
# examine cmd line args
#
while($arg = shift(@ARGV)){
if ($arg eq "-v") { # verbose mode
$verbose = 1;
$output = "";
} elsif ($arg eq "-t") { # report timing stats
$timex = "timex";
} elsif ($arg eq "-nfs") { # remove -local arg from find
$nfs = "";
} elsif ($arg eq "-force") { # force creation of database(s)
$force = 1;
} elsif ($arg eq "-r") { # database(s) root; already handled
shift(@ARGV);
} elsif ($arg eq "-missing") { # no-op; just use -update
;
} elsif ($arg eq "-extra") { # no-op; just use -update
;
} elsif ($arg eq "-toc") { # create browsable files for db
&makeDomainTOC(shift(@ARGV));
} elsif ($arg eq "-update") { # perform database update
$nextarg = shift(@ARGV);
&updateDomainIndex($nextarg);
} elsif ($arg eq "-instchk") { # perform database update (check inst)
$instchk = 1;
&instChkDomainUpdate(shift(@ARGV));
} elsif ($arg eq "-clean") { # clean up non-inst local files: exito
$clean_info = 1;
&cleanDomainIndex(shift(@ARGV));
} elsif ($arg eq "-check") { # no-op
;
} elsif ($arg eq "-keepwrd") {
$keepwrd = 1;
} else {
&usage();
exit(1);
}
}
&tmpFileCleanup();
exit(0);
#------
# usage
#
sub usage {
print "Usage:\n\tsgindexAdmin -update {[bks | relnotes | man]}\n";
}
#-----------
# readConfig
#
sub readConfig {
my($domain) = "";
my($var,$value) = "";
# no root, so check the value of infosrch.cfg for collection path
#
&readISConfig();
while(<DATA>){
next if /^\#/; # Skip comments
next if /^$/; # Skip blank lines
if(/\s*([A-Z]+)\s*=\s*([^\#\n]+)/){
$var = $1;
$value = $2;
$value =~ s/\s+$//; # Remove trailing spaces
if($var eq "DOMAIN"){
push(@domainList, $value);
$domain=$value;
}elsif($var eq "TOCPROG") {
$tocProgramList{"$domain"}=$value;
}elsif($var eq "IDXPROG") {
$indexProg{"$domain"}=$value;
}elsif($var eq "ALTPROG") {
$altProg{"$domain"}=$value;
}elsif($var eq "INFOPATH") {
if($value=~/\$([A-Z]+)\s+else\s+(.+)/){
my($envpath)=$1;
my($fbpath)=$2;
unless($ENV{$envpath}){
@searchPath=split(/\s+/, $fbpath);
}else{
@searchPath=split(/:/, $ENV{$envpath});
}
}else{
@searchPath=split(/\s+/, $value);
}
my($goodPath) = "";
foreach (@searchPath){
my($indexdir) = "";
# The domain Doc Root is prepended to the
# ENV SEARCH PATH
# Only add dirs which exist (else find(1) error)
if (-e &redirectedPath($_)) {
$tmpPath = &redirectedPath($_);
if (checkPath($tmpPath) == 1) {
$goodPath = "$goodPath " . $tmpPath;
unless($domainPathList{"$domain"}){
$indexdir = $tmpPath . "/SGIindex";
# If the index dir doesn't exist, create it.
# Otherwise, database gets built in "/"
#
if (!(-e $indexdir)
&&
!(mkdir($indexdir, 0755)) ) {
die "mkdir ($indexdir) failed: $!\n";
}
if (-w $indexdir ){
$domainPathList{"$domain"}=$indexdir;
}
}
}
}
}
$searchPath{"$domain"} = $goodPath;
}elsif($var eq "INFOREGEX") {
$contentExpr{"$domain"}=$value;
}else{
# skip it;
}
}
}
close(DATA);
}
#-------------
# readISConfig
#
# - parse cfg file
# - set $root if it's not already set
# - set collection id
#
sub readISConfig {
my($cfg) = "/usr/lib/infosearch/C/infosrch.cfg";
open(CFG, "$cfg") or return;
while (<CFG>) {
if (/\<COLLECTION(.*)\>/) {
$title, $path, $id = undef;
$title = $1 if (/TITLE\w*=\w*"([^"]+)"/);
$path = $1 if (/PATH\w*=\w*"([^"]+)"/);
$id = $1 if (/ID\w*=\w*"([^"]+)"/);
# store title, path and collection
push @cfgtitle, $title;
push @cfgpath, $path;
push @cfgid, $id;
}
}
close(CFG);
# $root is first PATH in cfg unless it's been defined
#
$root = $cfgpath[0] if ($root ne "/");
# find collection id for this $root
#
undef $coll;
($dev, $ino) = stat($root) or return;
for (my($n) = 0; n <= $#cfgtitle; $n++) {
($d, $i) = stat $cfgpath[$n] or return;
if ($dev == $d && $ino == $i) {
$coll = $cfgid[$n];
return;
}
}
}
#-------------------------------------------------
# updateDomainIndex($domain) # All if $domain=NULL
#
sub updateDomainIndex {
my($arg) = @_;
if ($arg eq '') {
foreach $domain (@domainList){
&updateIndex($domain);
}
} else {
&updateIndex($arg);
}
}
#---------------------
# updateIndex($domain)
#
sub updateIndex {
my($domain) = @_;
my($do_toc) = 0;
if ($force == 1) {
$do_toc = 1;
}
# check to see if we can write to this area
#
my($domain_p) = "$domainPathList{$domain}" . "/";
if ($domainPathList{$domain} eq '' || !(-d $domain_p) || !(-w $domain_p)) {
if($verbose == 1) {
print "Cannot write to designated area for $domain: $domain_p \n";
}
return;
}
my($domain_ttl) = &domain2title($domain);
my($domain_dct) = "$domainPathList{$domain}/$domain.dct";
# set this flag (file) when an operation should not be interrupted.
# Such ops, if interrupted, could ruin the integrity of the database;
# use this flag with caution.
#
$op_info = "$domainPathList{$domain}/_indexing";
# if this file currently exists, the integrity of our database
# may have been violated. In such a case, delete the master
# and the local and begin FROM SCRATCH...
#
if(-e $op_info) {
# inform if verbose or in instchk mode (to syslog)
#
if($verbose == 1) {
print "Building $domain from scratch; this may take a while\n";
} else {
if($instchk == 1) {
$msg = "Building $domain from scratch; this may take a while";
system("logger -t sgindexAdmin \"$msg\"");
die "logger failed: $!\n" if ($?);
}
}
$force = 1;
}
if (-e "$domainPathList{$domain}/local${domain}.dct") {
$do_toc = 1;
}
&cleanDomainIndex($domain);
# performs all phases for books
#
if ($domain eq "bks") {
system("/usr/bin/touch $op_info");
die "touch ($op_info) failed: $!\n" if ($?);
my $bkopts = "";
if ($verbose == 1) {
$bkopts .= " -v ";
}
if ($force == 1) {
$bkopts .= " -force ";
}
my($rp) = ($root ne "/" ? $root : '');
my($r) = "${rp}/usr/share/Insight/library/SGI_bookshelves";
system("$indexProg{$domain} $bkopts $r $output");
die "$indexProg{$domain}: unable to index books: $!\n" if ($?);
system("/usr/bin/rm -f $op_info");
die "rm ($op_info) failed: $!\n" if ($?);
system("/usr/bin/touch -amc $domain_dct");
die "touch ($domain_dct) failed: $!\n" if ($?);
return;
}
my($extraFile) = "$tmpdir/sgindex_extrafiles";
my($missingFile)= "$tmpdir/sgindex_missing";
$v = "";
if($verbose == 1) {
$v = " -v ";
}
# check for existing database, if not, we need to build a full one
$m = "";
if(-e $domain_ttl) {
$m = " -db $domain_ttl -missing $missingFile ";
}
# build list of dirs to look using ":" to separate them
#
my(@p, $spath) = "";
@p = split(/ /, $searchPath{$domain});
foreach $_ (@p) {
if ($_ ne "") {
if ($spath ne "") {
$spath = $spath.":".$_;
}
else {
$spath = $_;
}
}
}
# run the alternate command to produce our list of extra/missing
#
$c = "$altProg{$domain} $m -extra $extraFile $v " .
"-regex $contentExpr{$domain} -path $spath ";
$c .= " -root $root " if ($root ne "/");
$c .= " 2>/dev/null";
system($c);
die "Unable to create list of files for indexing: $!\n" if ($?);
# sgmerge this to delete entries found to be missing from the hard disk
#
if(-e $domain_ttl && -e $missingFile && -s $missingFile) {
if($verbose==1) {
$oldfiles = getFileLines($missingFile);
print "Removing $oldfiles extra documents from $domain index...\n";
}
system("/usr/bin/touch $op_info");
die "touch ($op_info) failed: $!\n" if ($?);
system("$timex sgmerge -x -b $domain_ttl -m $missingFile $output");
die "Failed to delete files from the $domain_ttl: $!\n" if ($?);
print "Extra files removed from $domain_ttl index\n"
unless ($verbose == 0);
system("/usr/bin/rm -f $op_info");
die "rm ($op_info) failed: $!\n" if ($?);
$do_toc = 1;
}
# now add in the extra files to the local* database; or create full one
#
if (-e $extraFile && -s $extraFile) {
if($verbose==1) {
$newfiles = getFileLines($extraFile);
print "Collecting words from $newfiles unindexed files...\n";
}
$bLocal=0;
$db_ttl = $domain_ttl;
if(-e $domain_ttl) {
# create localfoo
$db_ttl = $domain_ttl;
$db_ttl =~ s/$domain\.ttl/local$domain\.ttl/;
$bLocal = 1;
} else {
if($verbose==1) {
print "Forcing creation of new indices for $domain\n";
}
}
if($bLocal == 0) {
system("/usr/bin/touch $op_info");
die "touch ($op_info) failed: $!\n" if ($?);
}
unless(&buildIndex($domain, $indexProg{$domain}, $extraFile, $db_ttl)){
print STDERR "Unable to create an index: $domain\n";
}
if($bLocal == 0) {
system("/usr/bin/rm -f $op_info");
die "rm ($op_info) failed: $!\n" if ($?);
}
$do_toc = 1;
} else {
print "No files to add to $domain index\n"
unless ($verbose == 0);
}
if ($do_toc == 1) {
# create the toc LAST
#
&makeDomainTOC($domain);
}
# Update the last modified time of the dct even if we
# didn't need to modify the file just to "record" the
# last time we updated. Needed for -instchk (the
# date of this file will be compared to the inst
# history file).
#
system("/usr/bin/touch -amc $domain_dct");
die "touch ($domain_dct) failed: $!\n" if ($?);
}
#---------------------------------------------------
# instChkDomainUpdate($domain) # All if $domain=NULL
#
sub instChkDomainUpdate {
my($arg) = @_;
my($hist) = "/var/inst/hist";
return unless(-e $hist);
if ($arg ne '') {
@domainList = ("$arg");
}
my($dct_mtime, $hist_mtime) = 0;
my(@fstat) = ();
@fstat = stat("$hist");
$hist_mtime = ($fstat[9] + 0);
foreach $domain (@domainList) {
if( $domainPathList{$domain} eq '' ) {
next;
}
$indexttl = "$domainPathList{$domain}/${domain}.dct";
if ((-e $indexttl) && (-w $indexttl)) {
# If the domain index files exists and it's older
# than the inst hist file, update the index.
#
@fstat = stat("$domainPathList{$domain}/${domain}.dct");
$dct_mtime = ($fstat[9] + 0);
if ($hist_mtime > $dct_mtime) {
$startmsg =
"infosearch $domain index is out of date: updating";
$endmsg = "infosearch $domain index update completed";
system("logger -t sgindexAdmin \"$startmsg\"");
die "logger failed" if ($?);
&updateIndex($domain);
system("logger -t sgindexAdmin \"$endmsg\"");
die "logger failed" if ($?);
}
}
}
}
#----------------------------------------------------
# cleanDomainIndex($domainpath) # All if $domain=NULL
#
sub cleanDomainIndex {
my($arg) = @_;
if ($arg eq ''){
foreach $domain (@domainList) {
&cleanAllIndexFiles($domain);
}
} else {
&cleanAllIndexFiles($arg);
}
}
#----------------------------
# cleanAllIndexFiles($domain)
#
# Remove local files. If $force, remove all indices files
#
sub cleanAllIndexFiles {
my($d) = @_;
if ($domainPathList{$d} eq '') {
return;
}
print "Removing local index files and table of contents for ${d}\n"
unless ($verbose == 0);
$op = " ";
if ($clean_info == 1 || $force == 1) {
$op = "$domainPathList{$d}/_indexing";
}
if ($force == 1) {
$localindex = "$domainPathList{$d}/*.??? " .
"$domainPathList{$d}/*.html";
} else {
$localindex = "$domainPathList{$d}/local${d}.???";
}
system("/usr/bin/rm -f $localindex $op");
die "rm index files failed:\n$localindex : $!\n" if ($?);
}
#-------------------------------------------------
# makeDomainTOC($domainpath) # All if $domain=NULL
#
sub makeDomainTOC {
my($arg) = @_;
if ($arg eq '') {
foreach $domain (@domainList) {
if ($domainPathList{$domain} eq '') {
next;
}
$ttl = &domain2title($domain);
$tocfiles = "$domainPathList{$domain}/*.html";
system("/usr/bin/rm -f $tocfiles");
die "rm toc files failed:\n$tocfiles : $!\n" if ($?);
if (-e $ttl && -r $ttl) {
&makeBrowseTOC($domain);
} else {
print "\nCannot create table of contents for $domain\n"
unless($verbose == 0 || $domain eq 'bks');
}
}
} else {
if ($domainPathList{$arg} eq '') {
return;
}
$ttl = &domain2title($arg);
$tocfiles = "$domainPathList{$arg}/*.html";
system("/usr/bin/rm -f $tocfiles");
die "rm toc files failed:\n$tocfiles : $!\n" if ($?);
if(-e $ttl && -r $ttl) {
&makeBrowseTOC($arg);
} else {
print "\nCannot create table of contents for $arg\n"
unless($verbose == 0 || $domain eq 'bks');
}
}
}
#-----------------------
# makeBrowseTOC($domain)
#
# Create a browsable index from the search indices
#
sub makeBrowseTOC {
my($domain) = @_;
my($opts) = "";
if ($tocProgramList{$domain} eq '') {
return;
}
print "Building table of contents for $domain\n"
unless ($verbose == 0);
$opts = &domain2title($domain);
if ($domain eq "man" && $instchk == 1) {
$opts = "$opts -whatis";
}
$opts = $opts . " -coll $coll" if ($coll ne "");
system("$timex $tocProgramList{$domain} $opts ");
die "Failed to create a browsable index for $domain : $!\n" if ($?);
print "Created a browsable index for $domain\n"
unless($verbose == 0);
}
#-----------------------------------------------------------------------
# Boolean buildIndex($domain, $wordProg, $filesToIndex, $indexTitleFile)
#
# $wordProg: the program used to create the word files
#
sub buildIndex {
my($domain, $wordProg, $filesToIndex, $index_ttl) = @_;
# Extrapolate the other index file names from the title file
#
$index_base = $index_ttl;
unless($index_base =~ s/\.ttl$//){
&tmpFileCleanup();
die "$index_ttl is not an index title file: $!\n";
}
$index_dct = "$index_base.dct";
$index_wrd = "$index_base.wrd";
$index_tmpwrd = "$index_base.wrd.tmp";
# Create the word file from the source content
#
system("$timex $wordProg $filesToIndex > $index_wrd");
die "Error: Failed to make a word file for local files: $!\n" if ($?);
system("/usr/bin/rm -f $filesToIndex");
die "rm ($filesToIndex) failed: $!\n" if ($?);
unless($root eq "/"){
$rootTrans = $root;
$rootTrans =~ s/\./\\\./g;
$rootTrans =~ s/\//\\\//g;
my($trans) = "cat $index_wrd | " .
"sed -e \"s/$rootTrans//\" > $index_tmpwrd";
system($trans);
die "Error: Failed to translate $index_wrd: $!\n" if ($?);
&moveFile($index_tmpwrd,$index_wrd);
}
# Create an index
#
system("$timex sgdict $index_wrd $output");
die "Error: Failed to create a dictionary file: $!\n" if ($?);
$invertcmd = "$timex sginverter -x 5000000 -t $index_ttl" .
" -d $index_dct $index_wrd $output";
system($invertcmd);
die "Error: Failed to create an inverted index: $!\n" if ($?);
# Cleanup
#
system("rm $index_wrd") unless ($keepwrd);
die "rm ($index_wrd) failed: $!\n" if ($?);
return 1;
}
#----------------------
# domain2title($domain)
#
sub domain2title {
my($domain) = @_;
return("$domainPathList{$domain}/$domain.ttl");
}
#---------------------------
# domain2localtitle($domain)
#
sub domain2localtitle {
my($domain) = @_;
return("$domainPathList{$domain}/local${domain}.ttl");
}
#----------------------
# redirectedPath($path)
#
# Return the path with the content root prepended
#
sub redirectedPath {
my $arg = shift;
return($root ne "/" ? "$root$arg" : $arg);
}
#-----------------
# checkPath($path)
#
# Return 1 if the path is ok to process; avoid nastiness
#
sub checkPath {
my($pth) = @_;
if( ($pth cmp "/") == 0 || ($pth cmp "//") == 0 || ($pth cmp ".") == 0
||
($pth cmp "./") == 0 || ($pth cmp "/.") == 0
||
($pth cmp ".//") == 0 || ($pth cmp "//.") == 0 ) {
return 0;
}
return 1;
}
#------------------------
# int getFileLines($file)
#
# Return the number of lines in a file
#
sub getFileLines {
my($file) = @_;
my($filelines) = 0;
chop($filelines = `wc -l $file`);
$filelines =~ s/^\ *(\d+).*/$1/;
return($filelines);
}
#-------------------------------------------------
# void moveFile(String $srcFile, String $destFile)
#
# Move a file and report any errors
#
sub moveFile {
my($srcFile,$destFile) = @_;
system("/usr/bin/mv $srcFile $destFile");
die "mv ($srcFile $destFile) failed: $!\n" if ($?);
}
#----------------------
# void tmpFileCleanup()
#
sub tmpFileCleanup {
print "Removing temp files: $tmpdir/sgindex_*\n"
unless($verbose==0);
system("/usr/bin/rm -f $tmpdir/sgindex_*");
die "rm ($tmpdir/sgindex_*) failed: $!\n" if ($?);
}
# __END__ delimits the script from the data section
# The following data is read in by the "DATA" filehandle.
# This replaces the sgindexAdmin.config file.
__END__
DOMAIN = man
TOCPROG = /usr/lib/infosearch/bin/manTOC
INFOPATH = $MANPATH else /usr/share/catman /usr/share/man /usr/catman /usr/man
ALTPROG = /usr/lib/infosearch/bin/manAdmin
IDXPROG = /usr/sbin/sgreader -q -a TITLE -t -i
INFOREGEX = '*\.*'
DOMAIN = relnotes
TOCPROG = /usr/lib/infosearch/bin/relnotesTOC
INFOPATH = $RELNOTESPATH else /usr/relnotes
ALTPROG = /usr/lib/infosearch/bin/manAdmin
IDXPROG = /usr/lib/infosearch/bin/mkwordlist -i
INFOREGEX = '\( -name ch1.z -o -name ch01.z -o -name '*.gz' \)'
DOMAIN = bks
TOCPROG =
INFOPATH = /usr/share/Insight/library/SGI_bookshelves
ALTPROG =
IDXPROG = /usr/lib/infosearch/bin/booksAdmin
INFOREGEX =